import requests
from fake_useragent import UserAgent
from lxml import etree
url='http://www.cnafun.moa.gov.cn/zt/gjcr/202005/t20200518_6344419.html'
headers={
    "User-Agent":UserAgent().random
}
response=requests.get(url,headers=headers)
response.encoding='utf-8'
e=etree.HTML(response.text)
title=e.xpath('//h3[@id="title"]/text()')
content=e.xpath('//div[@class="TRS_Editor"]//p[@align="justify"]')
content2=[]
i=0
for c in content:
    #string(.)格式化内容，表示标签里面的内容无论是否有其它标签包裹，其内容都拿出来
    # 法一
    # info=c.xpath('normalize-space(string(.))a')
    # content2.append(info)
    # 法二
    if c.xpath('string(.)') !='':
        content2.append(c.xpath('string(.)'))
footer_content=e.xpath('//div[@class="TRS_Editor"]/div/span/span/text()')
content2.append(footer_content)
# 注意列表里面不能有数字
content_str="".join('%s' %id for id in content2)
print(content_str)
img_urls=e.xpath('//div[@class="TRS_Editor"]//p[@align="center"]/img/@src')
img_names=e.xpath('//p[@align="center"]/span/strong/text()')
print(title)
print(img_urls)
temp=e.xpath('string(//p[@align="center"]/b/span)')
img_names.insert(0,temp)
print(img_names)
for img_name in img_names:
    img_name="".join(title)+img_name
    print(img_name)














